import requests
import time
from bs4 import BeautifulSoup
from idna import unichr

url="http://ccs.scu.edu.cn/"
pageUrl=["http://ccs.scu.edu.cn/zxdt/tzgg.htm","http://ccs.scu.edu.cn/zxdt/tzgg/17.htm","http://ccs.scu.edu.cn/zxdt/tzgg/16.htm"]
title=[]
subUrl=[]
information=[]
passage=[]
imgUrl=[]
pagePicNum=[]
picNum=0
title=[]


for x in range(len(pageUrl)):
    response = requests.get(pageUrl[x])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get1 = soup.find('div', class_='zxdt-list')
    get2 = get1.find_all('li')
    for i in get2 :
        title.append(i.find('a').get_text())
        subUrl.append(url + i.find('a')['href'][3 :])



for y in range(len(subUrl)):
    response = requests.get(subUrl[y])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get = soup.find('p', class_='tit-p')
    s = get.get_text()
    s = s.replace(unichr(0xa0), '')
    information.append(s)
    getText = soup.find('div', class_='xwdt-artical')
    passage.append(getText.get_text())
    #网站无图片
    imgUrl.append('')
    pagePicNum.append('')